package com.xm.spider;

import java.util.HashSet;
import java.util.Set;

/**
 * 标点符号合集
 */
public class Punctuation {
    public static void main(String[] args) {
        System.out.println("共计" + getAllPunctuationSet().size() + "个");
        System.out.println(getAllPunctuationSet().toString());
    }

    /**
     * 中英文标点符号合集
     * @return
     */
    public static Set<Character> getAllPunctuationSet(){
        Set<Character> punctuationSet = getChinesePunctuationSet();
        punctuationSet.addAll(getEnglishPunctuationSet());
        return punctuationSet;
    }

    /**
     * 中文标点符号合集
     */
    public static Set<Character> getChinesePunctuationSet() {
        Set<Character> punctuationSet = new HashSet<>();

        // 添加全角标点
        addRange(punctuationSet, '，', '／'); // ， 。 ! ?
        addRange(punctuationSet, '：', '；'); // :
        addRange(punctuationSet, '［', '］'); // ［ ］
        addRange(punctuationSet, '｛', '｝'); // ｛ ｝
        addRange(punctuationSet, '（', '）'); // （ ）
        punctuationSet.add('￠'); // ％
        punctuationSet.add('︰'); //～

        // 添加半角标点
        punctuationSet.add(',');
        punctuationSet.add('.');
        punctuationSet.add('?');
        punctuationSet.add('!');
        punctuationSet.add(':');
        punctuationSet.add(';');

        // 添加其他中文特定标点
        punctuationSet.add('“'); // “
        punctuationSet.add('”'); // ”
        punctuationSet.add('‘'); // ‘
        punctuationSet.add('’'); // ’
        punctuationSet.add('—'); // —
        punctuationSet.add('…'); // …
        punctuationSet.add('、'); // 、

        punctuationSet.add('。'); // 。
        punctuationSet.add('《'); // 《
        punctuationSet.add('》'); // 》
        punctuationSet.add('〔'); // 〔
        punctuationSet.add('〕'); // 〕
        punctuationSet.add('〇'); // 〇
        punctuationSet.add('〈'); // 〈
        punctuationSet.add('〉'); // 〉
        punctuationSet.add('※'); // ※
        punctuationSet.add('〜'); // 〜

        //补充
        punctuationSet.add('·'); // ·

        return punctuationSet;
    }

    /**
     * 英文标点符号合集
     */
    public static Set<Character> getEnglishPunctuationSet() {
        Set<Character> punctuationSet = new HashSet<>();

        addRange(punctuationSet, 0x21, 0x2F);// !"#%&'()*+,-./
        addRange(punctuationSet, 0x3A, 0x40);// :;<=>?@
        addRange(punctuationSet, 0x5B, 0x60);// [\]^_`
        addRange(punctuationSet, 0x7B, 0x7E);// {|}~

        return punctuationSet;
    }
    /**
     * 添加指定范围标点符号
     */
    private static void addRange(Set<Character> set, int start, int end) {
        for (int i = start; i <= end; i++) {
            set.add((char) i);
        }
    }

}